import xlrd
import re
import pandas as pd
beans = ['菜豆', '豇豆', '毛豆', '豌豆', '扁豆', '蚕豆', '黑豆', '绿豆']  # 杂豆名称
diseases = []  # 病害
diseases_new = []  # 病害（去重后）
insects = []  # 虫害
insects_new = []  # 虫害（去重后）
pathogens = []  # 病原物
pathogens_new = []  # 病原物（去重后）
train_dataset=[] # 训练集
test_dataset=[] # 测试集

# 读取杂豆病虫害数据
def ReadExcel():
    # 打开文件
    excel = xlrd.open_workbook("D:\study\毕设\data\杂豆病虫害.xls")
    # 获取工作薄
    sheet = excel.sheet_by_index(0)
    # 获取表的行列数
    rows=sheet.nrows
    cols=sheet.ncols
    # 获取表中数值
    for i in range(1,rows):
        if "病害" in str(sheet.cell(i,0)):
            d={'CN':'','EN':'','AN':'','Brief':'','Symptom':'','Reason':'','Method':''}
            d['CN']=str(sheet.cell(i,1))[6:-1]
            d['EN']=str(sheet.cell(i,2))[6:-1]
            if d['EN']=="'":
                d['EN']=''
            d['AN']=str(sheet.cell(i,3))[6:-1]
            if d['AN']=="'":
                d['AN']=''
            d['Brief'] = str(sheet.cell(i, 4))[6:-1]
            if d['Brief']=="'":
                d['Brief']=''
            d['Symptom'] = str(sheet.cell(i, 5))[6:-1]
            if d['Symptom']=="'":
                d['Symptom']=''
            d['Reason'] = str(sheet.cell(i, 8))[6:-1].replace(' ','')
            if d['Reason']=="'":
                d['Reason']=''
            d['Method'] = str(sheet.cell(i, 11))[6:-1].replace(' ','')
            if d['Method']=="'":
                d['Method']=''
            diseases.append(d)
            if "：" in str(sheet.cell(i,6)): # 多种病原物
                np=(str(sheet.cell(i,6)).split('：')[1]).split('、')
                for j in np:
                    p = {'CN': '', 'EN': '', 'ABBR': '','Infect':''}
                    p['CN']=(re.sub("[A-Za-z0-9,(,),']", "", j)).split(' ')[0]  # 提取病原物中文名称
                    p['Infect']= str(sheet.cell(i, 7))[6:-1]
                    if "(" in j: # 病原物包含简写
                        p['ABBR']=j.split('(')[1][:-1]
                        if ")" in p['ABBR']:
                            p['ABBR']=p['ABBR'][:-1]
                    if len(p['ABBR'])!=0:
                        p['EN']=''.join(re.findall(r'[A-Za-z, ]', j))[:-len(p['ABBR'])]  # 提取病原物英文名称
                    else:
                        p['EN'] = ''.join(re.findall(r'[A-Za-z, ]', j))
                    pathogens.append(p)
            else: # 单一病原物
                p = {'CN': '', 'EN': '', 'ABBR': '','Infect':''}
                p['CN']=(re.sub("[A-Za-z0-9,(,),']", "", str(sheet.cell(i,6))[9:-1])).split(' ')[0]
                p['Infect'] = str(sheet.cell(i, 7))[6:-1]
                if "(" in (str(sheet.cell(i,6))[9:-1]):  # 病原物包含简写
                    p['ABBR'] = (str(sheet.cell(i,6))[9:-1]).split('(')[1][:-1]
                    if ")" in p['ABBR']:
                        p['ABBR'] = p['ABBR'][:-1]
                if len(p['ABBR']) != 0:
                    p['EN'] = ''.join(re.findall(r'[A-Za-z, ]', str(sheet.cell(i,6))[9:-1]))[:-len(p['ABBR'])]  # 提取病原物英文名称
                else:
                    p['EN'] = ''.join(re.findall(r'[A-Za-z, ]', str(sheet.cell(i, 6))[9:-1]))
                pathogens.append(p)
        elif "虫害" in str(sheet.cell(i,0)):
            I={'CN':'','EN':'','AN':'','Brief':'','Symptom':'','Feature':'','Lifestyle':'','Reason':'','Method':''}
            I['CN'] = str(sheet.cell(i, 1))[6:-1]
            I['EN'] = str(sheet.cell(i, 2))[6:-1]
            if I['EN'] == "'":
                I['EN'] = ''
            I['AN'] = str(sheet.cell(i, 3))[6:-1]
            if I['AN'] == "'":
                I['AN'] = ''
            I['Brief'] = str(sheet.cell(i, 4))[6:-1]
            if I['Brief'] == "'":
                I['Brief'] = ''
            I['Symptom'] = str(sheet.cell(i, 5))[6:-1]
            if I['Symptom'] == "'":
                I['Symptom'] = ''
            I['Reason'] = str(sheet.cell(i, 8))[6:-1].replace(' ','')
            if I['Reason'] == "'":
                I['Reason'] = ''
            I['Feature'] = str(sheet.cell(i, 9))[6:-1].replace(' ','')
            if I['Feature'] == "'":
                I['Feature'] = ''
            I['Lifestyle'] = str(sheet.cell(i, 10))[6:-1].replace(' ','')
            if I['Lifestyle'] == "'":
                I['Lifestyle'] = ''
            I['Method'] = str(sheet.cell(i, 11))[6:-1].replace(' ','')
            if I['Method'] == "'":
                I['Method'] = ''
            insects.append(I)
    # 去除重复值
    for i in diseases:
        flag1=0 # 标记病害是否重复
        for j in range(len(diseases_new)):
            if diseases_new[j]['CN']==i['CN']:
                flag1=1  # 病害重复
                break
        if flag1==0:
            diseases_new.append(i)
    for i in insects:
        flag2 = 0  # 标记虫害是否重复
        for j in range(len(insects_new)):
            if insects_new[j]['CN'] == i['CN']:
                flag2 = 1  # 虫害重复
                break
        if flag2 == 0:
            insects_new.append(i)
    for i in pathogens:
        flag3=0 # 标记病原物是否重复
        for j in range(len(pathogens_new)):
            if pathogens_new[j]['CN']==i['CN']:
                flag3=1  # 病原物重复
                break
        if flag3==0:
            pathogens_new.append(i)

def CreateDataset():
    # 创建训练集train_dataset
    # 0：杂豆病害
    for i in beans:
        q={'question':'','label':''}
        q['question']=i+"有哪些病害"
        q['label']=0
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "列举"+ i + "的病害"
        q1['label'] = 0
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i + "有什么病害"
        q2['label'] = 0
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i + "易患什么病"
        q3['label'] = 0
        train_dataset.append(q3)
    # 1：杂豆虫害
    for i in beans:
        q = {'question': '', 'label': ''}
        q['question'] = i + "有哪些虫害"
        q['label'] = 1
        train_dataset.append(q)
        q1={'question': '', 'label': ''}
        q1['question'] = "列举" + i + "的虫害"
        q1['label'] = 1
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i + "有什么虫害"
        q2['label'] = 1
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i + "易患什么虫害"
        q3['label'] = 1
        train_dataset.append(q3)
    # 2：英文名
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的英文名是什么"
        q['label'] = 2
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "的英语名字叫什么"
        q1['label'] = 2
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "的英文怎么说"
        q2['label'] = 2
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "在英语中怎么说"
        q3['label'] = 2
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的英文名是什么"
        q['label'] = 2
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "的英语名字叫什么"
        q1['label'] = 2
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "的英文怎么说"
        q2['label'] = 2
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "在英语中怎么说"
        q3['label'] = 2
        train_dataset.append(q3)
    # 3：别名
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的别名是什么"
        q['label'] = 3
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "还叫什么"
        q1['label'] = 3
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "有哪些别名"
        q2['label'] = 3
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "还可以称作什么"
        q3['label'] = 3
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的别名是什么"
        q['label'] = 3
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "还叫什么"
        q1['label'] = 3
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] =i['CN'] + "有哪些别名"
        q2['label'] = 3
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "还可以称作什么"
        q3['label'] = 3
        train_dataset.append(q3)
    # 4：简介
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的简介是什么"
        q['label'] = 4
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "什么是"+i['CN']
        q1['label'] = 4
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "介绍一下"+i['CN']
        q2['label'] = 4
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "简单介绍" + i['CN']
        q3['label'] = 4
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的简介是什么"
        q['label'] = 4
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "什么是" + i['CN']
        q1['label'] = 4
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "介绍一下" + i['CN']
        q2['label'] = 4
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "简单介绍" + i['CN']
        q3['label'] = 4
        train_dataset.append(q3)
    # 5：为害症状
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的为害症状是什么"
        q['label'] = 5
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN']+ "有哪些为害症状"
        q1['label'] = 5
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "根据什么症状可以判断出"+i['CN']
        q2['label'] = 5
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "怎样根据为害症状识别出" + i['CN']
        q3['label'] = 5
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的为害症状是什么"
        q['label'] = 5
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "有哪些症状"
        q1['label'] = 5
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "如何判断" + i['CN']
        q2['label'] = 5
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "怎样识别" + i['CN']
        q3['label'] = 5
        train_dataset.append(q3)
    # 6：病原物
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的病原物是什么"
        q['label'] = 6
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "什么病原导致"+i['CN']
        q1['label'] = 6
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "的发生是由于什么病原的侵染"
        q2['label'] = 6
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "的病原物有哪些"
        q3['label'] = 6
        train_dataset.append(q3)
    # 7：侵染循环
    for i in pathogens_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的侵染循环是什么"
        q['label'] = 7
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "如何进行侵染"
        q1['label'] = 7
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "通过什么方式侵染"
        q2['label'] = 7
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "的侵染方式是什么"
        q3['label'] = 7
        train_dataset.append(q3)
    # 8：发生因素
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的发生因素是什么"
        q['label'] = 8
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "为什么会发生"
        q1['label'] = 8
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "什么情况下"+i['CN']+"容易发生"
        q2['label'] = 8
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "的发生有什么规律"
        q3['label'] = 8
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的发生因素是什么"
        q['label'] = 8
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "为什么会发生"
        q1['label'] = 8
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "什么情况下" + i['CN'] + "容易发生"
        q2['label'] = 8
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "的发生有什么规律"
        q3['label'] = 8
        train_dataset.append(q3)
    # 9：形态特征
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] =i['CN'] + "的形态特征是什么"
        q['label'] = 9
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "长什么样"
        q1['label'] = 9
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "如何认出"+i['CN']
        q2['label'] = 9
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN']+"有什么形态特征"
        q3['label'] = 9
        train_dataset.append(q3)
    # 10：生活习性
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] =i['CN'] + "的生活习性是什么"
        q['label'] = 10
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "有什么生活习性"
        q1['label'] = 10
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "怎样生活"
        q2['label'] = 10
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "如何生存下来"
        q3['label'] = 10
        train_dataset.append(q3)
    # 11：防治方法
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] =i['CN'] + "的防治方法是什么"
        q['label'] = 11
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "如何防治"+i['CN']
        q1['label'] = 11
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN']+"有哪些防治方法"
        q2['label'] = 11
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "有什么有效的防治措施"
        q3['label'] = 11
        train_dataset.append(q3)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的防治方法是什么"
        q['label'] = 11
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = "如何防治" + i['CN']
        q1['label'] = 11
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = i['CN'] + "有哪些防治方法"
        q2['label'] = 11
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "有什么有效的防治措施"
        q3['label'] = 11
        train_dataset.append(q3)
    # 12：患病害杂豆
    for i in diseases_new:
        q={'question':'','label':''}
        q['question']=i['CN']+"有哪些杂豆易患"
        q['label']=12
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] =i['CN']+"是哪些杂豆的病害"
        q1['label'] = 12
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "哪些杂豆易患"+i['CN']
        q2['label'] = 12
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "列举易患"+i['CN'] + "的杂豆"
        q3['label'] = 12
        train_dataset.append(q3)
    # 13：患虫害杂豆
    for i in insects_new:
        q={'question':'','label':''}
        q['question']=i['CN']+"易为害哪些杂豆"
        q['label']=13
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] =i['CN']+"是哪些杂豆的虫害"
        q1['label'] = 13
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "哪些杂豆易被"+i['CN']+"为害"
        q2['label'] = 13
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = "列举易被"+i['CN'] + "为害的杂豆"
        q3['label'] = 13
        train_dataset.append(q3)
    # 14：致病病害
    for i in pathogens_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "是什么病的病原物"
        q['label'] = 14
        train_dataset.append(q)
        q1 = {'question': '', 'label': ''}
        q1['question'] = i['CN'] + "导致了什么病"
        q1['label'] = 14
        train_dataset.append(q1)
        q2 = {'question': '', 'label': ''}
        q2['question'] = "什么病的发生是由于"+i['CN'] + "的侵染"
        q2['label'] = 14
        train_dataset.append(q2)
        q3 = {'question': '', 'label': ''}
        q3['question'] = i['CN'] + "是哪些病的病原物"
        q3['label'] = 14
        train_dataset.append(q3)

    # 创建训练集test_dataset
    # 0：杂豆病害
    for i in beans:
        q={'question':'','label':''}
        q['question']=i+"容易得哪些病"
        q['label']=0
        test_dataset.append(q)
    # 1：杂豆虫害
    for i in beans:
        q = {'question': '', 'label': ''}
        q['question'] = i + "容易得哪些虫害"
        q['label'] = 1
        test_dataset.append(q)
    # 2：英文名
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "用英语怎么说"
        q['label'] = 2
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "用英语怎么说"
        q['label'] = 2
        test_dataset.append(q)
    # 3：别名
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的其他名字"
        q['label'] = 3
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的其他名字"
        q['label'] = 3
        test_dataset.append(q)
    # 4：简介
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的简要描述"
        q['label'] = 4
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "的简要描述"
        q['label'] = 4
        test_dataset.append(q)
    # 5：为害症状
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "被为害后会怎么样"
        q['label'] = 5
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "被为害后会怎么样"
        q['label'] = 5
        test_dataset.append(q)
    # 6：病原物
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "是被什么病原物侵染的"
        q['label'] = 6
        test_dataset.append(q)
    # 7：侵染循环
    for i in pathogens_new:
        q = {'question': '', 'label': ''}
        q['question'] = i['CN'] + "怎么侵染"
        q['label'] = 7
        test_dataset.append(q)
    # 8：发生因素
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = "什么条件容易导致"+i['CN'] + "的发生"
        q['label'] = 8
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = "什么条件容易导致"+i['CN'] + "的发生"
        q['label'] = 8
        test_dataset.append(q)
    # 9：形态特征
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] =i['CN'] + "在形态上有什么特征"
        q['label'] = 9
        test_dataset.append(q)
    # 10：生活习性
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] =i['CN'] + "在生活中有什么习性"
        q['label'] = 10
        test_dataset.append(q)
    # 11：防治方法
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] ="通过什么方法可以防治"+i['CN']
        q['label'] = 11
        test_dataset.append(q)
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] ="通过什么方法可以防治"+i['CN']
        q['label'] = 11
        test_dataset.append(q)
    # 12：患病害杂豆
    for i in diseases_new:
        q = {'question': '', 'label': ''}
        q['question'] = "有什么杂豆容易得"+i['CN']
        q['label'] = 12
        test_dataset.append(q)
    # 13：患虫害杂豆
    for i in insects_new:
        q = {'question': '', 'label': ''}
        q['question'] = "有什么杂豆容易被"+i['CN']+"为害"
        q['label'] = 13
        test_dataset.append(q)
    # 14：致病病害
    for i in pathogens_new:
        q = {'question': '', 'label': ''}
        q['question'] = "什么病是被"+i['CN'] + "侵染的"
        q['label'] = 14
        test_dataset.append(q)

if __name__ == '__main__':
    ReadExcel()
    CreateDataset()
    train_df = pd.DataFrame(train_dataset)
    test_df = pd.DataFrame(test_dataset)
    dataset=train_dataset
    dataset.extend(test_dataset)
    # train_df.to_csv(r'D:\study\毕设\data\question_train_dataset_large.csv',index=False)
    # test_df.to_csv(r'D:\study\毕设\data\question_test_dataset_large.csv', index=False)
    test_df.to_csv(r'D:\study\毕设\data\question_dataset_large.csv', index=False)